package com.dmcb.trade.business.crawlers.article;

import com.dmcb.trade.business.constants.CrawlConstant;
import com.dmcb.trade.business.entities.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * Created by Administrator on 2016/6/16.
 */
@Service
public class XinLangBoKeParser extends BaseParser {

    /**
     * 网站URL网址
     *
     * @return 链接
     */
    @Override
    protected String url() {
        return CrawlConstant.XINLANGBOKE_URL;
    }

    /**
     * 获取标题选择器
     *
     * @return 标题选择器
     */
    @Override
    protected String titleSelect() {
        return "div.articalTitle h2,div.atcbox>h1,h1.h1_tit";
    }

    /**
     * time SG_txtc
     * 获取日期选择器
     *
     * @return 发布日期选择器
     */
    @Override
    protected String dateSelect() {
        return "div.articalTitle>span.SG_txtc,div.artinfo>span";
    }

    /**
     * 获取正文内容选择器
     *
     * @return 正文内容选择器
     */
    @Override
    protected String bodySelect() {
        return "div#sina_keyword_ad_area2,div.articalContent";
    }


    /**
     * 获取封面选择器
     *
     * @return 封面选择器
     */
    @Override
    protected String coverSelect() {
        return "div.articalContent>div img[src]";
    }

    /**
     * 设置发布时间
     *
     * @param doc     网页
     * @param article 文章
     */
    @Override
    protected void setPostDate(Document doc, Article article) {
        Element element = doc.select(dateSelect()).first();
        if (element != null) {
            String time = text(element).replace("(", "");
            time = time.replace(")", "");
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            try {
                Date date = sdf.parse(time);
                if (time != null) {
                    article.setPostTime(date);
                }
            } catch (ParseException e) {
                e.printStackTrace();
            }

        }
    }
    @Override
    protected String platform() {
        return "新浪博客";
    }
}
